IMPORT
library(ggplot2)
library(tidyverse)
library(dplyr)
library(harmony)
library(Seurat)
# Import seurat object
seur.human <- readRDS("~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/raw_data/human_data/filtered_seurat_Harmony_07-22-22.RDS")
# Sanity check
print(seur.human) # 79,801 cells (it's the whole seurat object)
## An object of class Seurat
## 34778 features across 79801 samples within 2 assays
## Active assay: SCT (17389 features, 3000 variable features)
## 1 other assay present: RNA
## 3 dimensional reductions calculated: pca, umap, harmony
# Quick visualization
DimPlot(seur.human)

DEFINE FUNCTIONS
Preprocess <- function(seuratobject, SCTsplit=T, res=0.5, harmony_vars= c("Method", "Batch"), printComputation=T){
# Extract the counts data (from RNA assay) and metadata
counts <- GetAssayData(object = seuratobject, slot = "counts", assay="RNA")
metadata <- seuratobject@meta.data
metadata[, c("nCount_SCT", "nFeature_SCT", "SCT_snn_res.0.7", "seurat_clusters", "SCT_snn_res.0.9", "SCT_snn_res.1")] <- NULL # remove some cluster columns to avoid any confusion
colnames(metadata)[14] <- "clusters_bigUMAP" # rename "new_clusters" column
# Create new seurat object with only the counts and the "cleaned" metadata
initseurat <- CreateSeuratObject(counts = counts, meta.data = metadata, min.cells=20) # keep only genes expressed in at least 20 cells
cat("Initial seurat object:\n")
print(initseurat) # this is for sanity check
if(SCTsplit==T){
cat("\n1-Perform SCTransform on split data\n")
# Normalize with SCTransform by splitting object (per batch)
seur.list <- SplitObject(initseurat, split.by = "orig.ident")
for(i in 1:length(seur.list)){
cat("...SCTransform on", names(seur.list)[i], "\n")
seur.list[[i]] <- SCTransform(seur.list[[i]], vars.to.regress = 'percent.mt', verbose=printComputation)
}
# seur.list <- lapply(X = seur.list, FUN = SCTransform, vars.to.regress="percent.mt", method="glmGamPois") # not working
sct.hvg <- SelectIntegrationFeatures(object.list = seur.list, nfeatures = 3000)
seur.SCT <- merge(seur.list[[1]], y = unlist(seur.list[-1], use.names=FALSE), merge.data = TRUE)
VariableFeatures(seur.SCT) <- sct.hvg
}
else if(SCTsplit==F){
cat("\n1-Perform SCTransform on merged data")
# Normalize with SCTransform (without splitting object)
seur.SCT <- SCTransform(initseurat,
assay="RNA",
new.assay.name="SCT",
vars.to.regress = c('percent.mt'),
verbose=printComputation)
}
# Run PCA (don't scale data when using SCTransform)
cat("\n2-Run PCA pre-integration")
seur.SCT <- RunPCA(object = seur.SCT, assay = "SCT", npcs = 50, verbose=printComputation)
p1 <- DimPlot(seur.SCT, reduction = "pca", group.by = "orig.ident") + ggtitle("PCA pre-integration")
ElbowPlot(seur.SCT)
cat("\nSeurat object pre-integration:\n")
print(seur.SCT)
# Run Harmony for integration
cat("\n3-Run Harmony\n")
seur.harmony <- RunHarmony(object = seur.SCT,
assay.use = "SCT",
reduction = "pca",
dims.use = 1:50,
group.by.vars = harmony_vars,
plot_convergence = printComputation)
cat("\nSeurat object post-integration:\n")
print(seur.harmony)
cat("\n4-Run UMAP & clustering\n")
seur.harmony <- RunUMAP(object = seur.harmony, dims = 1:15, assay = "SCT", reduction = "harmony", verbose=printComputation)
seur.harmony <- FindNeighbors(object = seur.harmony, dims = 1:15, assay = "SCT", reduction = "harmony", verbose=printComputation) %>%
FindClusters(resolution = res, verbose=printComputation)
# Show PCA after integration
p2 <- DimPlot(seur.harmony, reduction = "harmony", group.by = "orig.ident") + ggtitle("PCA post-integration")
print(p1 | p2)
return(seur.harmony)
}
THYMIC MAIT CELLS
1. Preprocessing
# Isolate MAIT cells
MAIT_Thymus <- subset(seur.human, ident = "MAIT_Thymus") # 4689 cells
# Quick QC
FeatureScatter(MAIT_Thymus, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch")

FeatureScatter(MAIT_Thymus, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Batch")

# PREPROCESS
MAIT_SCTsplit <- Preprocess(seuratobject = MAIT_Thymus, SCTsplit = T, res = 0.3, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10772 features across 4689 samples within 1 assay
## Active assay: RNA (10772 features, 0 variable features)
##
## 1-Perform SCTransform on split data
## ...SCTransform on MAIT_1_Thymus
## ...SCTransform on MAIT_2_Thymus
## ...SCTransform on MAIT_3_Thymus
##
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering

MAIT_SCTmerged <- Preprocess(seuratobject = MAIT_Thymus, SCTsplit = F, res = 0.3, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10772 features across 4689 samples within 1 assay
## Active assay: RNA (10772 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 21544 features across 4689 samples within 2 assays
## Active assay: SCT (10772 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering

2. Visualize
# Sanity checks
# DimPlot(seur.harmony, dims = 1:2, group.by = "orig.ident")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Sex")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Donor")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Batch")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Method")
# Look at clusters
DimPlot(MAIT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.3", label = T) + ggtitle("SCT on split data")

DimPlot(MAIT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.3", label = T) + ggtitle("SCT on merged data")

THYMIC NKT CELLS
1. Preprocessing
# Isolate NKT cells
NKT_Thymus <- subset(seur.human, ident = "NKT_Thymus") # 2551 cells
# Quick QC
FeatureScatter(NKT_Thymus, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch")

FeatureScatter(NKT_Thymus, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Batch")

# PREPROCESS
NKT_SCTsplit <- Preprocess(seuratobject = NKT_Thymus, SCTsplit = T, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10005 features across 2551 samples within 1 assay
## Active assay: RNA (10005 features, 0 variable features)
##
## 1-Perform SCTransform on split data
## ...SCTransform on NKT_1_Thymus
## ...SCTransform on NKT_2_Thymus
## ...SCTransform on NKT_3_Thymus
##
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering

NKT_SCTmerged <- Preprocess(seuratobject = NKT_Thymus, SCTsplit = F, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 10005 features across 2551 samples within 1 assay
## Active assay: RNA (10005 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 20010 features across 2551 samples within 2 assays
## Active assay: SCT (10005 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering

2. Visualize
# Sanity checks
# DimPlot(seur.harmony, dims = 1:2, group.by = "orig.ident")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Sex")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Donor")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Batch")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Method")
# Look at clusters
DimPlot(NKT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on split data") |
DimPlot(NKT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on merged data")

THYMIC GD T CELLS
1. Preprocessing
# Isolate gdT cells
gdT_Thymus <- subset(seur.human, ident = "GD_Thymus") # 2981 cells
# Quick QC
FeatureScatter(gdT_Thymus, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by="Batch")

FeatureScatter(gdT_Thymus, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Batch")

# PREPROCESS
gdT_SCTsplit <- Preprocess(seuratobject = gdT_Thymus, SCTsplit = T, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 11860 features across 2981 samples within 1 assay
## Active assay: RNA (11860 features, 0 variable features)
##
## 1-Perform SCTransform on split data
## ...SCTransform on GD_1_Thymus
## ...SCTransform on GD_2_Thymus
##
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering

gdT_SCTmerged <- Preprocess(seuratobject = gdT_Thymus, SCTsplit = F, res = 0.4, harmony_vars = c("Sex", "Method"), printComputation = F)
## Initial seurat object:
## An object of class Seurat
## 11860 features across 2981 samples within 1 assay
## Active assay: RNA (11860 features, 0 variable features)
##
## 1-Perform SCTransform on merged data
## 2-Run PCA pre-integration
## Seurat object pre-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 1 dimensional reduction calculated: pca
##
## 3-Run Harmony
##
## Seurat object post-integration:
## An object of class Seurat
## 23720 features across 2981 samples within 2 assays
## Active assay: SCT (11860 features, 3000 variable features)
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, harmony
##
## 4-Run UMAP & clustering

2. Visualize
# Sanity checks
# DimPlot(seur.harmony, dims = 1:2, group.by = "orig.ident")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Sex")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Donor")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Batch")
# DimPlot(seur.harmony, dims = 1:2, group.by = "Method")
# Look at clusters
DimPlot(gdT_SCTsplit, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on split data") |
DimPlot(gdT_SCTmerged, dims = 1:2, reduction = "umap", group.by = "SCT_snn_res.0.4", label = T) + ggtitle("SCT on merged data")
